Setting up the R environment

Installing packages and loading the library

# Install packages
paket <- function(pak){
  new_pak <- pak[!(pak %in% rownames(installed.packages()))]
  if (length(new_pak)) 
    install.packages(new_pak, dependencies = TRUE,repos="https://cloud.r-project.org/")
  sapply(pak, library, character.only = TRUE)
}
listOfPackages <- c("tidyverse", "RColorBrewer", "knitr", "kableExtra", "tsModel", "gridExtra", "dplyr", "metafor", "meta", "viridis")
paket(listOfPackages)

R session information

sessionInfo()
# options(repr.plot.width = 18, repr.plot.height = 9)
theme_plots <- theme_bw() +
  theme(strip.text = element_text(size = 5),
        axis.text.x = element_text(size = 8), 
        axis.text.y = element_text(size = 6), 
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        title = element_text(size = 10),
        plot.subtitle = element_text(size = 9, face = "italic")) 
theme_set(theme_plots)

# Colorblind palette
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# cut-off dates used in the analysis, these should not be changed
time_period <-"months" #possible values: days, weeks, months
history_start_date <- as.Date("2019-01-01")
history_end_date <- as.Date("2021-05-31")
pandemic_start_date <- as.Date("2020-03-15")
start_date_plots <- as.Date("2019-01-15")
end_date_plots <- as.Date("2021-05-01")

Read the input file by site

Each site shared the aggregated counts results. Putting all the results together.

outputDir <- "../../output/"
outputFiles <- list.files( path = outputDir, pattern = "\\.RData")
country_map <- read.delim( file = paste0( outputDir, "country_mapping.txt"))
getwd()
## [1] "/Users/alba/Desktop/Phase2.2.psy_peds_trends/metanalysis_visualization"

Put all the aggregated counts from the different sites together

for( i in 1:length( outputFiles )){
  load(paste0( outputDir, outputFiles[i]))
  if( i == 1 ){
    count_icd_all <- count_icd
    patients_hospitalized_agg_sex_perc_all <- patients_hospitalized_agg_sex_perc
    patient_count_psy_period_psy_all <- patient_count_psy_period_psy
    perc_disorder_group_all <- perc_disorder_group
    ratios_patients_with_without_psy_all <- ratios_patients_with_without_psy
  }else{
    count_icd_all <- rbind(count_icd_all, count_icd)
    patients_hospitalized_agg_sex_perc_all <- rbind( patients_hospitalized_agg_sex_perc_all, patients_hospitalized_agg_sex_perc)
    patient_count_psy_period_psy_all <- rbind( patient_count_psy_period_psy_all,patient_count_psy_period_psy)
    perc_disorder_group_all <- rbind(perc_disorder_group_all, perc_disorder_group)
    ratios_patients_with_without_psy_all <- rbind( ratios_patients_with_without_psy_all, ratios_patients_with_without_psy)
  }
  rm(count_icd,patients_hospitalized_agg_sex_perc,patient_count_psy_period_psy,
     perc_disorder_group,ratios_patients_with_without_psy,
     patient_count_psy_period_psy_clear, table1, bootstrapped_coefficients_df,
     bootstrapped_fitted_df, length_hospitalisation_values)
}
## Warning in rm(count_icd, patients_hospitalized_agg_sex_perc,
## patient_count_psy_period_psy, : object 'length_hospitalisation_values' not found
## Warning in rm(count_icd, patients_hospitalized_agg_sex_perc,
## patient_count_psy_period_psy, : object 'bootstrapped_coefficients_df' not found
## Warning in rm(count_icd, patients_hospitalized_agg_sex_perc,
## patient_count_psy_period_psy, : object 'bootstrapped_fitted_df' not found

Percentage: patients with psychiatric conditions / total number of patients

By site

ratios_patients_with_without_psy_all %>%
  filter(time_p <= history_end_date &
         time_p >= history_start_date ) %>%
  ggplot( aes(x=time_p, y=percentage, 
              group = siteid, color = siteid))+
  geom_point( aes(shape=siteid, color=siteid) )+
    geom_line() +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_fill_viridis(discrete = TRUE, alpha=0.6) +
    theme(
      legend.position="bottom",
      plot.title = element_text(size=11)
    ) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
    ggtitle("Percentage: patients with psychiatric conditions / total number of patients") +
    xlab("period (months)")

By Country

Showing each independent site

ratios_patients_with_without_psy_all_country <- left_join( ratios_patients_with_without_psy_all, country_map )
## Joining, by = "siteid"
ratios_patients_with_without_psy_all_country %>%
    filter(time_p <= history_end_date &
         time_p >= history_start_date )  %>%
  ggplot(aes(x = time_p, y = percentage, fill = siteid, color = siteid)) +
  geom_point( aes(shape=siteid, color=siteid)  ) +
  geom_line() +
  facet_grid(country~.) +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_fill_manual(values = cbPalette) +
  scale_color_manual(values = cbPalette) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month", limits = c( history_start_date, history_end_date  )) +
  labs(y = "Ratio (with psy conditions/without psy conditions)",
       x = paste0("Date (by ", time_period,")"),
       title = paste0("Percentage: patients with psychiatric conditions / total number of patients ( per ", time_period,")"))

Aggregating the sites in each country

ratios_patients_with_without_psy_all_country <- left_join( ratios_patients_with_without_psy_all, country_map )
## Joining, by = "siteid"
ratios_patients_with_without_psy_aggregated <- ratios_patients_with_without_psy_all_country %>%
  group_by( time_p, country ) %>%
  mutate( no_psy = sum( count_no_psy_patients ), 
          psy = sum( count_psy_patients ), 
          total = sum(count_no_psy_patients) + sum(count_psy_patients), 
          ratio_by_country = psy / no_psy, 
          perc_by_country = 100*(psy / total) ) %>%
  select( time_p, no_psy, psy, total, ratio_by_country, perc_by_country, country)

ratios_patients_with_without_psy_aggregated %>%
  filter(time_p <= history_end_date &
         time_p >= history_start_date ) %>%
  ggplot( aes(x=time_p, y=perc_by_country, 
              group = country, color = country))+
  geom_point( aes(shape=country, color=country) )+
    geom_line() +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_colour_viridis_d() +
    theme(
      legend.position="bottom",
      plot.title = element_text(size=11)
    ) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
    ggtitle(paste0("Percentage: patients with psychiatric conditions / total number of patients ( per ", time_period,") \n (aggregated by country)")) +
    xlab("period (months)") +
    ylab("percentage (%)")

Ratio: patients with vs. patients without psychiatric conditions

By site

ratios_patients_with_without_psy_all %>%
  filter(time_p <= history_end_date &
         time_p >= history_start_date ) %>%
  ggplot( aes(x=time_p, y=ratio, 
              group = siteid, color = siteid))+
  geom_point( aes(shape=siteid, color=siteid) ) + 
  geom_line() +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_fill_viridis(discrete = TRUE, alpha=0.6) +
    theme(
      legend.position="bottom",
      plot.title = element_text(size=11)
    ) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
    ggtitle("Ratio: patients with vs. patients without psychiatric conditions") +
    xlab("period (months)")

By Country

Showing each site per country

ratios_patients_with_without_psy_all_country %>%
    filter(time_p <= history_end_date &
         time_p >= history_start_date ) %>%
  ggplot(aes(x = time_p, y = ratio, fill = siteid, color = siteid)) +
  geom_point( aes(shape=siteid, color=siteid)  ) +
  geom_line() +
  facet_grid(country~.) +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_fill_manual(values = cbPalette) +
  scale_color_manual(values = cbPalette) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month", limits = c( history_start_date, history_end_date  )) +
  labs(y = "Ratio (with psy conditions/without psy conditions)",
       x = paste0("Date (by ", time_period,")"),
       title = paste0("Ratio patient with vs without psychiatric conditions ( per ", time_period,")"))

Aggregating the values of the sites per country

ratios_patients_with_without_psy_aggregated %>%
  filter(time_p <= history_end_date &
         time_p >= history_start_date ) %>%
  ggplot( aes(x=time_p, y=ratio_by_country, 
              group = country, color = country))+
  geom_point( aes(shape=country, color=country) )+
    geom_line() +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_colour_viridis_d() +
    theme(
      legend.position="bottom",
      plot.title = element_text(size=11)
    ) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
    ggtitle("Ratios: patients with psychiatric conditions / without psychiatric conditions \n (aggregated by country)") +
    xlab("period (months)") +
    ylab("ratio")

Meta-analysis percentage

#split this into before and after pandemic and run it twice (one for each subset)

#estimate the summary effect size
ratios_patients_with_without_psy_all$total <- ratios_patients_with_without_psy_all$count_no_psy_patients + ratios_patients_with_without_psy_all$count_psy_patients

ratios_bp <- ratios_patients_with_without_psy_all %>%
  filter( period == "before_pandemic")

ratios_dp <- ratios_patients_with_without_psy_all %>%
  filter( period == "during_pandemic")

metainputdata <- ratios_dp
#RR no transformation
#PLO the logit transformation
#PFT the double arcsine transformation
ies=escalc(xi=count_psy_patients, ni=total, data=metainputdata, measure="PR")
summary(ies$vi)

# pool the individual effect size
# DL random effects using the DerSimonian-Laird estimator
#REML random effects using the restricted maximum-likelihood estimator
pes = rma(yi, vi, data=ies, method="REML")
print(pes)
confint(pes)

pes.summary=metaprop(count_psy_patients, total, as.character(time_p),
                     data=metainputdata, 
                     sm="PRAW")
forest(pes.summary,layout = "JAMA")

Boxplot showing ratio per month

ratios_patients_with_without_psy_all %>%
  ggplot( aes(x=as.factor(time_p), y=ratio)) +
    geom_boxplot() +
    scale_fill_viridis(discrete = TRUE, alpha=0.6) +
    geom_jitter(color="black", size=0.4, alpha=0.9,
                aes(shape = siteid)) +
    theme(
      legend.position="bottom",
      plot.title = element_text(size=11)
    ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
    ggtitle("Ratio boxplot") +
    xlab("")

Percentage of patients by disorder group

perc_disorder_group_all %>%
    filter(time_p <= history_end_date &
         time_p >= history_start_date &
           disorder_group %in% c("Anxiety Disorders", "Depressive Disorders", "Suicide or Self-Injury" )) %>%
  ggplot(aes(x = time_p, y = percentage_dg, fill = siteid, color = siteid)) +
  geom_point( aes(shape=siteid, color=siteid)  ) +
  geom_line() +
  facet_grid(. ~ disorder_group) +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_fill_manual(values = cbPalette) +
  scale_color_manual(values = cbPalette) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month", limits = c( history_start_date, history_end_date  )) +
  labs(y = "Percentage of patients (%)",
       x = paste0("Date (by ", time_period,")"),
       title = paste0("Percentage of patients by disorder group \n ( per ", time_period,")"))

perc_disorder_group_all %>%
    filter(time_p <= history_end_date &
         time_p >= history_start_date &
          #disorder_group %in% c("Anxiety Disorders", "Depressive Disorders", "Suicide or Self-Injury" )) %>%
          disorder_group %in% c("Suicide or Self-Injury" )) %>%
  ggplot(aes(x = time_p, y = percentage_dg, fill = siteid, color = siteid)) +
  geom_point( aes(shape=siteid, color=siteid)  ) +
  geom_line() +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
  scale_fill_manual(values = cbPalette) +
  scale_color_manual(values = cbPalette) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month", limits = c( history_start_date, history_end_date  )) +
  labs(y = "Percentage of patients (%)",
       x = paste0("Date (by ", time_period,")"),
       title = paste0("Percentage of patients with Suicide or Self-Injury \n ( per ", time_period,")"))

Sex ratio per site

sex_ratio_by_site <- patients_hospitalized_agg_sex_perc_all %>%
  filter( sex == "female") %>%
  group_by( time_p, siteid ) %>%
  mutate( female_count = count_sex, 
          male_count = count - count_sex, 
          sex_ratio_by_site = female_count / male_count ) %>%
  select( time_p, female_count, male_count, sex_ratio_by_site, siteid)

sex_ratio_by_site %>%
    filter(time_p <= history_end_date &
         time_p >= history_start_date) %>%
  ggplot(aes(x = time_p, y = sex_ratio_by_site, fill = siteid, color = siteid)) +
  geom_point( aes(shape=siteid, color=siteid)  ) +
  geom_line() +
  geom_vline(xintercept = as.Date(pandemic_start_date),
             linetype = "dashed") +
   geom_hline(yintercept = 1,
             linetype = "dashed", color = "red") +
  scale_fill_manual(values = cbPalette) +
  scale_color_manual(values = cbPalette) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  scale_x_date(date_labels = "%b-%d-%Y", breaks = "month", limits = c( history_start_date, history_end_date  )) +
  labs(y = "sex ratio (female/male)",
       x = paste0("Date (by ", time_period,")"),
       title = paste0("Sex ratio of patients with mental disorders \n ( per ", time_period,")"))